# -*- coding: utf-8 -*-#
#-------------------------------------------------------------------------------
# 建立者:        博智科技  
# Name:         test
# Description:
# Author:       yzl
# Date:         2019-02-11
#-------------------------------------------------------------------------------

import requests
import re
from bs4 import BeautifulSoup,NavigableString

# 模拟请求头
headerdict = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}

# 请求地址
url = 'https://www.baidu.com/s'

# 请求参数
params = {'wd':'株洲it培训'}

# 获取网页信息
r = requests.get(url,params=params,headers=headerdict)
# print(r.text)
doc_html = r.text
# 利用bt4 分析网页类容
soup = BeautifulSoup(doc_html,'lxml')
# tmpid = soup.div.attrs.get('id')
# print(tmpid)
# print(r.status_code)

# 主内容区域
main_html = soup.find('div',id='content_left')#.prettify()
# print(main_html.div['class'])
# print(main_html.children)

# 翻页内容区域
pages_html = soup.find('div',id='page')#.prettify()
# print(pages_html.contents)
i = 0
for tmp in pages_html.children:
    # if i == 0:
    #     i = i + 1
    #     continue
    if type(tmp) == NavigableString:
        # print('ok')
        continue
    else:
        print(type(tmp))
        # print(tmp.string)
        print(tmp.name)
        d = tmp.attrs
        if tmp.name == 'strong':
            continue
        print(d['href'])
        # print(tmp.attrs['href'])
        # i = i+1

# 获取下一页 url
last_html = soup.find('a',attrs={'class':'n'})
# print(type(last_html))
# print(last_html.contents)
# print(last_html.children)
# for tmp in last_html:
#     print(tmp)
spa = BeautifulSoup(str(last_html),'lxml')
# print(spa.a['href'])














